from pyspark import SparkConf, SparkContext
import os
os.environ['PYSPARK_PYTHON'] = r"D://Python/Python3107/python.exe"

conf = SparkConf().setMaster("local[*]").setAppName("test_spark")
sc = SparkContext(conf=conf)

rdd = sc.textFile(r"D://itheima/Python/pyspark案例/hello.txt")
rdd2 = rdd.flatMap(lambda x: x.split(" "))
# 计算每个单词的出现次数
rdd3 = rdd2.map(lambda x: (x, 1)).reduceByKey(lambda a, b: a + b)
rdd3 = rdd3.sortBy(lambda x : x[1], ascending=False, numPartitions=1)
print(rdd3.collect())


sc.close()